########################
# Merge Estimates with Covariates


# Script Loads Estimates and merges it with different datasources
# a) Comparative Studies of Electoral Surveys
# b) Manifesto-Daten
# c) GDP-Growth 

# Resulting Dataset saved in 


# Create Dataset
library(stringr)
library(readstata13)
library(lubridate)


#----------------------
# Daten einlesen
#----------------------

## Modell-Output
dat <- readRDS("res_party.RDS")

## Select Variablen
dat <- dat[str_detect(dat$par, "nu"),]
dat <- dat[, c("mean", "ctry_nam", "year", "pid")]


## Create File with Ambg estimates and Pos estimates
dat_res <- readRDS("res_party.RDS")
dat_res$par <- sub('\\[.*', '', dat_res$par)
dat_res <- dat_res[, c("ctry_nam", "year", "pid","pid_nam", "mean","sd","2.5%","97.5%","par")]

dat_res <- reshape(dat_res, direction="wide", 
                   idvar=c("pid","pid_nam","ctry_nam","year"),
                   timevar = "par", v.names = c( "mean","sd","2.5%","97.5%"))


colnames(dat_res) <- c("ctry_nam", "year", "pid","pid_nam", "amb_mean","amb_sd","amb_ci2.5","amb_ci97.5", "pos_mean","pos_sd","pos_ci2.5","pos_ci97.5")
write.csv(dat_res,file="NyhuisStoetzer_AmbguityEstimates_CHES_2006-17.csv")

## Chapel Hill Means 
means <- suppressWarnings(read.dta13("1999-2014_CHES_dataset_means.dta"))
means <- means[means$year %in% c("2006", "2010", "2014"),]


## Merge
dat <- merge(dat, means, by.x = c("year", "pid"), by.y = c("year", "party_id"))

## Comparative Studies of Electoral Surveys
load("cses2.rdata")
load("cses3.rdata")
load("cses4.rdata")

colnames(cses2) <- tolower(colnames(cses2))
colnames(cses3) <- tolower(colnames(cses3))
colnames(cses4) <- tolower(colnames(cses4))

## Comparsion of CSES und CHES (self)
cses_ches_06 <- read.csv("means_2006.csv", stringsAsFactors = F)
cses_ches_10 <- read.csv("means_2010.csv", stringsAsFactors = F)
cses_ches_14 <- read.csv("means_2014.csv", stringsAsFactors = F)

cses_ches_06$party_cses <- tolower(cses_ches_06$party_cses)
cses_ches_10$party_cses <- tolower(cses_ches_10$party_cses)
cses_ches_14$party_cses <- tolower(cses_ches_14$party_cses)

cses_ches_06 <- cses_ches_06[!is.na(cses_ches_06$party_cses),]
cses_ches_10 <- cses_ches_10[!is.na(cses_ches_10$party_cses),]
cses_ches_14 <- cses_ches_14[!is.na(cses_ches_14$party_cses),]

## Manifesto-Daten
manifesto <- suppressWarnings(read.dta13("MPDataset_MPDS2016a.dta"))
manifesto_num <- read.dta13("MPDataset_MPDS2016a.dta", convert.factors = F)


manifesto$election_year <- year(manifesto$edate)
manifesto_num$election_year <- year(manifesto_num$edate)

## GDP-Growth 
gdp <- read.csv("gdp_growth.csv")

dat <- merge(dat, gdp, by = c("ctry_nam", "year"))

#----------------------
# Recode
#----------------------

## Vote Results
previous_election_result <- data.frame(
  cmp_id = numeric(),
  election = numeric(), 
  country = character(),
  result = numeric()
)

for(i in 1:nrow(cses_ches_06)){
  
  cmp_id <- means$cmp_id[cses_ches_06$pid[i] == means$party_id][1]
  
  tmp_manifesto <- manifesto[
    manifesto$countryname == cses_ches_06$cses_country[i] & 
      manifesto_num$party == cmp_id,
    ]
  
  election <- cses_ches_06$election_year_cses[i] - tmp_manifesto$election_year
  election <- election[election > 0]
  
  if(length(election) > 0){
    
    election <- min(election)
    election <- which((cses_ches_06$election_year_cses[i] - tmp_manifesto$election_year) == election)
    
  }
  
  if(length(election) == 0){
    next
  }
  
  previous_election_result <- rbind(
    previous_election_result,
    data.frame(
      cmp_id = cmp_id,
      election = tmp_manifesto$election_year[election],
      country = cses_ches_06$cses_country[i],
      result = tmp_manifesto$pervote[election]
    )
  )
  
}

dat_ids <- dat[, c("cmp_id", "pid")]
dat_ids <- dat_ids[!duplicated(dat_ids$pid),]
dat_ids <- dat_ids[!is.na(dat_ids$cmp_id),]

previous_election_result <- merge(previous_election_result, dat_ids, by = "cmp_id")

## Delta vote

dat$delta_vote <- NA

for(i in 1:nrow(dat)){
  if(dat$year[i] == "2006"){
    if(dat$pid[i] %in% previous_election_result$pid){
      dat$delta_vote[i] <- dat$vote[i] - previous_election_result$result[previous_election_result$pid == dat$pid[i]]
    }
  }
  if(dat$year[i] == "2010"){
    if(nrow(dat[dat$year == "2006" & dat$pid == dat$pid[i],]) > 0){
      dat$delta_vote[i] <- dat$vote[i] - dat$vote[dat$year == "2006" & dat$pid == dat$pid[i]] 
    }
  }
  if(dat$year[i] == "2014"){
    if(nrow(dat[dat$year == "2010" & dat$pid == dat$pid[i],]) > 0){
      dat$delta_vote[i] <- dat$vote[i] - dat$vote[dat$year == "2010" & dat$pid == dat$pid[i]] 
    }
  }
}

## Delta ambig (wir)

dat$delta_ambig <- NA

for(i in 1:nrow(dat)){
  if(dat$year[i] == "2006"){
    next
  }
  if(dat$year[i] == "2010"){
    if(nrow(dat[dat$year == "2006" & dat$pid == dat$pid[i],]) > 0){
      dat$delta_ambig[i] <- dat$mean[i] - dat$mean[dat$year == "2006" & dat$pid == dat$pid[i]] 
    }
  }
  if(dat$year[i] == "2014"){
    if(nrow(dat[dat$year == "2010" & dat$pid == dat$pid[i],]) > 0){
      dat$delta_ambig[i] <- dat$mean[i] - dat$mean[dat$year == "2010" & dat$pid == dat$pid[i]] 
    }
  }
}

## PID as character

dat$pid_char <- as.character(dat$pid)

## Government participation

dat$govt_bin <- ifelse(dat$govt %in% c(".5","in government"), 1, 0)

dat$govt_bin_cons <- ifelse(dat$govt %in% c(".5","not in government"), 0, 1)

## Moderation

cses2$country <- cses2$b1006_nam
cses2$year <- cses2$b1008

cses2$lr_self <- cses2$b3045
cses2$lr_self[cses2$lr_self > 10] <- NA

for(party in letters[1:9]){
  cses2[, str_c("lr_party_", party)] <- cses2[, str_c("b3038_", party)]
  cses2[, str_c("lr_party_", party)][cses2[, str_c("lr_party_", party)] > 10] <- NA
}	

cses3$country <- cses3$c1006_nam
cses3$year <- cses3$c1008

cses3$lr_self <- cses3$c3013
cses3$lr_self[cses3$lr_self > 10] <- NA

for(party in letters[1:9]){
  cses3[, str_c("lr_party_", party)] <- cses3[, str_c("c3011_", party)]
  cses3[, str_c("lr_party_", party)][cses3[, str_c("lr_party_", party)] > 10] <- NA
}	

cses4$country <- cses4$d1006_nam
cses4$year <- cses4$d1008

cses4$lr_self <- cses4$d3014
cses4$lr_self[cses4$lr_self > 10] <- NA

for(party in letters[1:9]){
  cses4[, str_c("lr_party_", party)] <- cses4[, str_c("d3013_", party)]
  cses4[, str_c("lr_party_", party)][cses4[, str_c("lr_party_", party)] > 10] <- NA
}	

dat$preference_mean <- NA
dat$party_perception_current <- NA
dat$party_perception_previous <- NA

for(i in 1:nrow(dat)){
  if(i %% 10 == 0){
    cat(i, "\n")
  }
  if(dat$year[i] == "2006"){
    next
  }
  if(dat$year[i] == "2010"){
    if(dat$pid[i] %in% cses_ches_10$pid & dat$pid[i] %in% cses_ches_06$pid){
      ## Mean position voters
      dat$preference_mean[i] <- mean(
        cses3$lr_self[
          cses3$country == cses_ches_10$cses_country[cses_ches_10$pid == dat$pid[i]] &
            cses3$year == cses_ches_10$election_year_cses[cses_ches_10$pid == dat$pid[i]]
          ],
        na.rm = T
      )
      ## Mean perception previous election
      dat$party_perception_current[i] <- mean(
        cses3[, str_c("lr_party_", cses_ches_10$party_cses[cses_ches_10$pid == dat$pid[i]])][
          cses3$country == cses_ches_10$cses_country[cses_ches_10$pid == dat$pid[i]] &
            cses3$year == cses_ches_10$election_year_cses[cses_ches_10$pid == dat$pid[i]]
          ],
        na.rm = T
      )
      ## Mean perception previous election
      dat$party_perception_previous[i] <- mean(
        cses2[, str_c("lr_party_", cses_ches_06$party_cses[cses_ches_06$pid == dat$pid[i]])][
          cses2$country == cses_ches_06$cses_country[cses_ches_06$pid == dat$pid[i]] &
            cses2$year == cses_ches_06$election_year_cses[cses_ches_06$pid == dat$pid[i]]
          ],
        na.rm = T
      )
    }
  }
  if(dat$year[i] == "2014"){
    if(dat$pid[i] %in% cses_ches_14$pid & dat$pid[i] %in% cses_ches_10$pid){
      ## Mean position voters
      dat$preference_mean[i] <- mean(
        cses4$lr_self[
          cses4$country == cses_ches_14$cses_country[cses_ches_14$pid == dat$pid[i]] &
            cses4$year == cses_ches_14$election_year_cses[cses_ches_14$pid == dat$pid[i]]
          ],
        na.rm = T
      )
      ## Mean perception previous election
      dat$party_perception_current[i] <- mean(
        cses4[, str_c("lr_party_", cses_ches_14$party_cses[cses_ches_14$pid == dat$pid[i]])][
          cses4$country == cses_ches_14$cses_country[cses_ches_14$pid == dat$pid[i]] &
            cses4$year == cses_ches_14$election_year_cses[cses_ches_14$pid == dat$pid[i]]
          ],
        na.rm = T
      )
      ## Mean perception previous election
      dat$party_perception_previous[i] <- mean(
        cses3[, str_c("lr_party_", cses_ches_10$party_cses[cses_ches_10$pid == dat$pid[i]])][
          cses3$country == cses_ches_10$cses_country[cses_ches_10$pid == dat$pid[i]] &
            cses3$year == cses_ches_10$election_year_cses[cses_ches_10$pid == dat$pid[i]]
          ],
        na.rm = T
      )
    }
  }
}

dat$moderation <- abs(dat$preference_mean - dat$party_perception_previous) - abs(dat$preference_mean - dat$party_perception_current)
dat$moderation[dat$moderation >= 2] <- 2
dat$moderation[dat$moderation <= -2] <- -2
dat$moderation[dat$moderation > 0 & dat$moderation < 2] <- 1
dat$moderation[dat$moderation < 0 & dat$moderation > -2] <- -1

dat$moderation <- dat$moderation * abs(dat$party_perception_previous - dat$party_perception_current)

cses_ches_06$year <- "2006"
cses_ches_10$year <- "2010"
cses_ches_14$year <- "2014"

cses_ches <- rbind(cses_ches_06, cses_ches_10, cses_ches_14)
cses_ches <- cses_ches[, c("pid", "year", "election_year_cses", "cses_country")]

dat <- merge(dat, cses_ches, by = c("pid", "year"), all.x = T)

dat <- merge(dat, manifesto_num, by.x = c("cmp_id", "election_year_cses"), by.y = c("party", "election_year"), all.x = T)

## Single-Issue

leftright_items <- c(
  "per104",
  "per201",
  "per203",
  "per305",
  "per401",
  "per402",
  "per407",
  "per414",
  "per505",
  "per601",
  "per603",
  "per605",
  "per606",
  "per103",
  "per105",
  "per106",
  "per107",
  "per403",
  "per404",
  "per406",
  "per412",
  "per413",
  "per504",
  "per506",
  "per701",
  "per202"
)

dat$sum_rile <- apply(dat[, leftright_items], 1, sum)
dat$niche_st <- dat$sum_rile < 38.82

## Lagged dependent variable

dat$ldv <- NA

for(i in 1:nrow(dat)){
  if(dat$year[i] == "2014"){
    if(nrow(dat[dat$pid == dat$pid[i] & dat$year == "2010",]) > 0){
      dat$ldv[i] <- dat$delta_vote[dat$pid == dat$pid[i] & dat$year == "2010"]
    }
  }
  if(dat$year[i] == "2010"){
    if(nrow(dat[dat$pid == dat$pid[i] & dat$year == "2006",]) > 0){
      dat$ldv[i] <- dat$delta_vote[dat$pid == dat$pid[i] & dat$year == "2006"]
    }
  }
}

## Ambig (ST)

dat$ambig_st <- NA

for(i in 1:nrow(dat)){
  if(dat$year[i] == 2006){
    
    pid <- dat$pid[i]
    letter <- cses_ches_06$party_cses[cses_ches_06$pid == pid]
    year <- cses_ches_06$election_year_cses[cses_ches_06$pid == pid]
    country <- cses_ches_06$cses_country[cses_ches_06$pid == pid]
    
    cses_short <- cses2[
      cses2$year == year &
        cses2$country == country, 
      str_c("lr_party_", letter)
      ]
    
    if(length(cses_short) > 300){
      dat$ambig_st[i] <- sd(cses_short, na.rm = T)
    }
    
  }
  if(dat$year[i] == 2010){
    
    pid <- dat$pid[i]
    letter <- cses_ches_10$party_cses[cses_ches_10$pid == pid]
    year <- cses_ches_10$election_year_cses[cses_ches_10$pid == pid]
    country <- cses_ches_10$cses_country[cses_ches_10$pid == pid]
    
    cses_short <- cses3[
      cses3$year == year &
        cses3$country == country, 
      str_c("lr_party_", letter)
      ]
    
    if(length(cses_short) > 300){
      dat$ambig_st[i] <- sd(cses_short, na.rm = T)
    }
    
  }
  if(dat$year[i] == 2014){
    
    pid <- dat$pid[i]
    letter <- cses_ches_14$party_cses[cses_ches_14$pid == pid]
    year <- cses_ches_14$election_year_cses[cses_ches_14$pid == pid]
    country <- cses_ches_14$cses_country[cses_ches_14$pid == pid]
    
    cses_short <- cses4[
      cses4$year == year &
        cses4$country == country, 
      str_c("lr_party_", letter)
      ]
    
    if(length(cses_short) > 300){
      dat$ambig_st[i] <- sd(cses_short, na.rm = T)
    }
    
  }
}

## Delta Ambig (ST)

dat$delta_ambig_st <- NA

for(i in 1:nrow(dat)){
  if(dat$year[i] == 2006){
    next
  }
  if(dat$year[i] == 2010){
    tmp_ambig_prev <-  dat$ambig_st[dat$year == 2006 & dat$pid == dat$pid[i]]
    if(length(tmp_ambig_prev) > 0){
      dat$delta_ambig_st[i] <- dat$ambig_st[i] - tmp_ambig_prev
    }
  }
  if(dat$year[i] == 2014){
    tmp_ambig_prev <- dat$ambig_st[dat$year == 2010 & dat$pid == dat$pid[i]]
    if(length(tmp_ambig_prev) > 0){
      dat$delta_ambig_st[i] <- dat$ambig_st[i] - tmp_ambig_prev
    }
  }
}



saveRDS(dat,"Merge_est_covar.Rds")



## 

